#coding:utf-8 
 
 
import urllib2
from lxml import etree

response=urllib2.urlopen("http://www.baidu.com")
html=response.read()
tree=etree.HTML(html)    #etree提供了HTML这个解析函数
text=tree.xpath('//div[@id="u1"]/a/text()').extract()   #返回提取内容


